package cn.spark.study.sql;

import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.DataFrame;
import org.apache.spark.sql.SQLContext;

/**
 * DataFrame常见操作
 *
 * @author jun.zhang6
 * @date 2020/11/12
 */
public class DataFrameOperation {
    public static void main(String[] args) {
        SparkConf conf = new SparkConf()
                .setAppName("DataFrameOperation");
        JavaSparkContext sc = new JavaSparkContext(conf);

        SQLContext sqlContext = new SQLContext(sc);

        DataFrame df = sqlContext.read().json("hdfs://ymm1:9000/students.json");

        //打印DataFrame所有的数据
        df.show();

        //查询某列所有的数据
        df.select(df.col("name")).show();

        //查询某几列所欲的数据，并对列进行计算
        df.select(df.col("name"), df.col("age").plus(1)).show();

        //根据某一列的值进行过滤
        df.filter(df.col("age").gt(18)).show();

        //根据某一列进行分组，然后聚合
        df.groupBy(df.col("age")).count().show();

    }
}
